/*
CLEANING RAMS DATA

Data 	: A1_jobb_90_13.dta
Folder 	: 
Date	: 2018-01-25

Creator		: Jonas Cederlof (JC)
Description 	: Fixing duplicates and expanding RAMS data 

Notes 		: Data only includes indiviuals eventually getting notified. 
LATEST UPDATE 	: 
*/

*===============================================================================

clear
set more		 off
cap   log close 	_all

log using 	"../log/A1_clean_RAMS.log", replace 
use 		"$datapath/A0_RAMS_85_19.dta"



{ // First look at the data
********************************************************************************
*count
*desc
*sum, format
*misstable sum
}
*
{ // Renaming & Labeling variables
********************************************************************************
*Rename
rename lopnr 		 	persid
rename lopnr_peorgnr 	 	firmid
rename lopnr_cfarnr 	 	plantid
rename manfran 			fi_month		 			
rename mantill 			la_month

*Label variables
lab var persid 			"ID: individual"
lab var firmid			"ID: firm"
lab var plantid 		"ID: plant"
lab var fi_month		"First month of employment in the year"
lab var la_month		"Last month of employment in the year"
lab var astnr			"Workplace number (RAMS)"
lab var astkommun		"Workplace municiplaity"
lab var astlan		 	"Workplace county"
lab var astsni02		"Workplace industry (SNI02)"

lab var lonfink 	 	"Total salary during the year (SEK)"
lab var skatt 		 	"Total taxes during a year (SEK)"
lab var yrkstallnku 		"Koder som visar yrkesstallningen"
lab var forman 		 	"Taxable job perks (SEK) (Forman)"
lab var agkat 	 		"Foretagets agarkategori"
lab var statusf		  	"Status for egna foretagare A=ktiv P=passiv?"

*Destring
destring 	 	yrkstallnku  agkat astnr , replace

*Label values
lab define  		yrkstallnku  	0 "No info" 1 "Seaworker"  2 "Other employed" 4 "Self-employed" 5 "Self-employed (AB)"
lab define 		agkat 	  	0 "Unkown"  1 "Statligt agande" 2 "Kommunalt agande" 3 "Other ownership" 9 "?" 10 "Statligt kontrollerade enheter" 20 "Kommunalt kontrollerade enheter" 30 "Landstingskontrollerade enheter" 41 "Privat SWE enheter utan koncerntillhorighet" 42 "Privat SWE enheter med koncerntillhorighet" 50 "Utlandskontrollerade"
lab val 		yrkstallnku yrkstallnku
lab val 		agkat	    agkat
}
*
{ // Dropping  variabels
********************************************************************************

drop  astkommun astlan skatt forman  statusf 

}
*
{ // Correcting variabels fi_month and la_month
********************************************************************************
* There is a problem with observations where indiviuals are registered as having
* started work at month 0 and ended work month 0. For self-emplyed (yrkstallnky==2)
* this is always the case by construction of the data. As the other guys, they 
* have positive earnings and paid taxes so I recon that this is a meassurement 
* error that is to be considered as a missing value which occurs  in about 1.3% 
* of all yearly spells. (The meassurement error is smaller for later years). 

*The month zero are alwyas occuring jointly
tab 	 la_month  	   fi_month 	 ,missing
count if la_month==0 & fi_month==0 & lonfink>0

{ 	// Look for a pattern for when a "zero-year" happens ***
	*===================================================================
	*Generate: tag "zero-year" observations 
	bys persid firmid  year  : gen  tag = 1 if fi_month==0 & la_month==0

	*Generate: Are the "zero-year" followed or precluded by a full year spell?
	bys persid firmid (year) : gen  diffmonth  = la_month + la_month[_n+1] if tag[_n]==1
	bys persid firmid (year) : gen  diffmonth2 = la_month + la_month[_n-1] if tag[_n]==1

	tab2 diffmonth*
	tab  diffmonth
}		
* Conclusion: In almost all cases these occur either in pairs (several 
* zero-years in a row) or they are followed or precluded by a yearly 
* employment spell that has lasted until month 12. Thus I assume that 
* these yearly spells are to be full-time spells. (This is also what 
* you kind of see when you eyeball the data). Thus I replace these 
* missing observations with fi_month==1 and la_month==12. These obs- 
* ervations are also flagged at the firm level(!) so that the robust-
* ness to this assumption can be evaluated.

*Flag: flag emp. spells that we've changed from month 0 to either 1 or 12
gen 	 temp = la_month==0 |la_month==99 | fi_month==0 | fi_month==99 
fastmax  temp , by(persid firmid) name(flag_work)
lab var  flag_work 	"Flag: ==1 if 'missing' months observations in work register"

*Replace: since they are always joinlty defined as 0 I only need one if-restriction
replace fi_month = 1 if fi_month==0
replace fi_month = 1 if fi_month==99

replace la_month = 12 if la_month==0 
replace la_month = 12 if la_month==99
drop diff* tag temp

}
*
{ // Checking for duplicates & collapsing data
********************************************************************************
*duplicates report persid year firmid
*sort persid year firmid

*** Good example to follow what I'm doing (follow tag==1)
*list persid firmid  year fi_month la_month lonfink if persid==9183, sepby(firmid)
*gen tag = 1 in 6585/6588


* Expanding yearly observations to monthly.
* Note: As a an indivual can have multiple registiration in the same year on the 
* 	same firm I need to take this into account in order not to create months
*	that do not exist (e.g. month 112). This is done by generating a variable
* 	"nvals" that indicates how many (multiple) times a particular firm is 
*	registered for the same indiviual during the same year.


* Indicator for how many times in a year an employer is observed 
* I.e. the firm has multiple reporting for an indivual during a year which we
* want to collapse into one report!
* Note that this below is done due to the fi_month and la_month indicators
* becomming f*cked up if we collapse right away. 
bys persid year firmid 		: gen nvals = _n
expand  12 					
bys persid year firmid nvals 	: gen month = _n


*Create a dummy variable equal to unity for working a partiular month
gen 	work =  cond(inrange(month,fi_month,la_month),1,0) 
lab var work 	"Working in month at firmid"

*Collapsing the data 	
*Note: 	We are summing lonfink by firm year and month where the latter is
*	the same as  _n ,by(nvals). Hence multiple registrations at the same 
* 	firm during the same year and month for the same indiviual is just
* 	added together in terms of earnings. (this only happens if an employemnt 
*	spell os "broken into two" due to monthly indicators being seperate.
*	I.e one spell starts in month 1 ends in 4 second spell starts in 6 
*	and ends in e.g. 12 (from the same firm in the same year).
fcollapse (sum) lonfink (max) work plantid  (first) astnr astsni* yrkstallnku agkat ,by(persid firmid year month)

*duplicates report persid firmid year month
}
*

*Generate: Creating date variable in Stata format
gen  	date   = ym(year, month)
format 	date %tm


{ // Labeling variables
********************************************************************************

*Label variables
lab var persid 			"ID: individual"
lab var firmid			"ID: firm"
lab var plantid 		"ID: plant"
lab var astnr			"Workplace number (RAMS)"

lab var astsni69		"Workplace industry (SNI69)"
lab var astsni92		"Workplace industry (SNI92)"
lab var astsni02		"Workplace industry (SNI02)"
lab var astsni07		"Workplace industry (SNI07)"

lab var lonfink 	 	"Total salary during the year (SEK)"
lab var yrkstallnku 		"Koder som visar yrkesstallningen"
lab var agkat 	 		"Foretagets agarkategori"
lab var date			"Date: Year-Month"
lab var work			"==1 if working in a given month"

*Label values
cap lab define  	yrkstallnku  	0 "No info" 1 "Seaworker"  2 "Other employed" 4 "Self-employed" 5 "Self-employed (AB)"
cap lab define 		agkat 	  	0 "Unkown"  1 "Statligt agande" 2 "Kommunalt agande" 3 "Other ownership" 9 "?" 10 "Statligt kontrollerade enheter" 20 "Kommunalt kontrollerade enheter" 30 "Landstingskontrollerade enheter" 41 "Privat SWE enheter utan koncerntillhorighet" 42 "Privat SWE enheter med koncerntillhorighet" 50 "Utlandskontrollerade"
lab val 		yrkstallnku yrkstallnku
lab val 		agkat	    agkat
}
*

*Drop variables
drop astnr astsni69 astsni92 agkat 

*Save
compress 
save 	"$datapath/A1_cleaned_RAMS.dta",replace


log close


